Project 1 DATA1901

Code
library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(rafalib)
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
Code
library(dplyr)

pooled_data = read_excel("data.xlsx", "pooled123")

Executive Summary

Explorary Data Analysis (EDA)

Structures

Limitations

Assumptions

Research Question and Analysis

Research Question

RQ1 : Which specific side effects increase the most after VR exposure?

Analysis

Code
BSSQ <- data.frame(
  label = c("BSSQ_1","BSSQ_2","BSSQ_3","BSSQ_4",
            "BSSQ_5","BSSQ_6","BSSQ_7","BSSQ_8",
            "BSSQ_9","BSSQ_10","BSSQ_11","BSSQ_12",
            "BSSQ_13","BSSQ_14","BSSQ_15","BSSQ_16"),
  sum   = c(sum(pooled_data$BSSQ_1), sum(pooled_data$BSSQ_2),
            sum(pooled_data$BSSQ_3), sum(pooled_data$BSSQ_4),
            sum(pooled_data$BSSQ_5), sum(pooled_data$BSSQ_6),
            sum(pooled_data$BSSQ_7), sum(pooled_data$BSSQ_8),
            sum(pooled_data$BSSQ_9), sum(pooled_data$BSSQ_10),
            sum(pooled_data$BSSQ_11), sum(pooled_data$BSSQ_12),
            sum(pooled_data$BSSQ_13), sum(pooled_data$BSSQ_14),
            sum(pooled_data$BSSQ_15), sum(pooled_data$BSSQ_16))
)

ASSQ <- data.frame(
  label = c("ASSQ_1","ASSQ_2","ASSQ_3","ASSQ_4",
            "ASSQ_5","ASSQ_6","ASSQ_7","ASSQ_8",
            "ASSQ_9","ASSQ_10","ASSQ_11","ASSQ_12",
            "ASSQ_13","ASSQ_14","ASSQ_15","ASSQ_16"),
  sum   = c(sum(pooled_data$ASSQ_1), sum(pooled_data$ASSQ_2),
            sum(pooled_data$ASSQ_3), sum(pooled_data$ASSQ_4),
            sum(pooled_data$ASSQ_5), sum(pooled_data$ASSQ_6),
            sum(pooled_data$ASSQ_7), sum(pooled_data$ASSQ_8),
            sum(pooled_data$ASSQ_9), sum(pooled_data$ASSQ_10),
            sum(pooled_data$ASSQ_11), sum(pooled_data$ASSQ_12),
            sum(pooled_data$ASSQ_13), sum(pooled_data$ASSQ_14),
            sum(pooled_data$ASSQ_15), sum(pooled_data$ASSQ_16))
)

BSSQ$label <- factor(BSSQ$label, levels = BSSQ$label)
ASSQ$label <- factor(ASSQ$label, levels = ASSQ$label)

diff <- data.frame(
  label = c("ASSQ_1 - BSSQ_1","ASSQ_2 - BSSQ_2","ASSQ_3 - BSSQ_3","ASSQ_4 - BSSQ_4","ASSQ_5 - BSSQ_5","ASSQ_6 - BSSQ_6","ASSQ_7 - BSSQ_7","ASSQ_8 - BSSQ_8", "ASSQ_9 - BSSQ_9", "ASSQ_10 - BSSQ_10", "ASSQ_11 - BSSQ_11", "ASSQ_12 - BSSQ_12", "ASSQ_13 - BSSQ_13", "ASSQ_14 - BSSQ_14", "ASSQ_15 - BSSQ_15","ASSQ_16 - BSSQ_16"),
  diff = ASSQ$sum - BSSQ$sum
)
diff$label <- factor(diff$label, levels = diff$label)


plot_overall <- ggplot(diff, aes(x = label, y = diff, fill = label,
                         text = paste(label, diff, sep=": "))) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(
    axis.text.x  = element_blank(),
    axis.title.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +
  labs(y = "ASSQ - BSSQ", title = "Difference between ASSQ and BSSQ")
Code
ggplotly(plot_overall, tooltip = "text")

Based on figure 1, the general discomfort has the most significant increase among all side effects for people who don’t see the social model. Interestingly, there is a negative difference which means, after VR, it reduces some people fatigue.

Code
happy <- pooled_data %>%
  group_by(happy) %>%
  reframe(diff2 = sum(ASSQ_3) - sum(BSSQ_3), people = n(), average = diff2 / people)
happy
# A tibble: 8 × 4
  happy diff2 people average
  <chr> <dbl>  <int>   <dbl>
1 3         1      1   1    
2 4         0      4   0    
3 5        20     31   0.645
4 6        28     42   0.667
5 7        30     61   0.492
6 8        19     38   0.5  
7 9         8     35   0.229
8 NA       71    124   0.573
Code
pooled_data_male = filter(pooled_data, gender == "Male")

BSSQ_male = c(sum(pooled_data_male$BSSQ_1), sum(pooled_data_male$BSSQ_2), sum(pooled_data_male$BSSQ_3), sum(pooled_data_male$BSSQ_4), sum(pooled_data_male$BSSQ_5), sum(pooled_data_male$BSSQ_6), sum(pooled_data_male$BSSQ_7), sum(pooled_data_male$BSSQ_8), sum(pooled_data_male$BSSQ_9), sum(pooled_data_male$BSSQ_10), sum(pooled_data_male$BSSQ_11), sum(pooled_data_male$BSSQ_12), sum(pooled_data_male$BSSQ_13), sum(pooled_data_male$BSSQ_14), sum(pooled_data_male$BSSQ_15), sum(pooled_data_male$BSSQ_16))

ASSQ_male = c(sum(pooled_data_male$ASSQ_1), sum(pooled_data_male$ASSQ_2), sum(pooled_data_male$ASSQ_3), sum(pooled_data_male$ASSQ_4), sum(pooled_data_male$ASSQ_5), sum(pooled_data_male$ASSQ_6), sum(pooled_data_male$ASSQ_7), sum(pooled_data_male$ASSQ_8), sum(pooled_data_male$ASSQ_9), sum(pooled_data_male$ASSQ_10), sum(pooled_data_male$ASSQ_11), sum(pooled_data_male$ASSQ_12), sum(pooled_data_male$ASSQ_13), sum(pooled_data_male$ASSQ_14), sum(pooled_data_male$ASSQ_15), sum(pooled_data_male$ASSQ_16))

diff_male <- data.frame(
  label = c("ASSQ_1 - BSSQ_1","ASSQ_2 - BSSQ_2","ASSQ_3 - BSSQ_3","ASSQ_4 - BSSQ_4","ASSQ_5 - BSSQ_5","ASSQ_6 - BSSQ_6","ASSQ_7 - BSSQ_7","ASSQ_8 - BSSQ_8", "ASSQ_9 - BSSQ_9", "ASSQ_10 - BSSQ_10", "ASSQ_11 - BSSQ_11", "ASSQ_12 - BSSQ_12", "ASSQ_13 - BSSQ_13", "ASSQ_14 - BSSQ_14", "ASSQ_15 - BSSQ_15","ASSQ_16 - BSSQ_16"),
  diff = ASSQ_male - BSSQ_male
)

diff_male$label <- factor(diff_male$label, levels = diff_male$label)

plot_male <- ggplot(diff_male, aes(x = label, y = diff, fill = label,
                         text = paste(label, diff, sep=": "))) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(
    axis.text.x  = element_blank(),
    axis.title.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +
  labs(y = "ASSQ - BSSQ", title = "Difference between ASSQ and BSSQ in Male")

ggplotly(plot_male, tooltip = "text")
Code
pooled_data_female = filter(pooled_data, gender == "Female")

BSSQ_female = c(sum(pooled_data_female$BSSQ_1), sum(pooled_data_female$BSSQ_2), sum(pooled_data_female$BSSQ_3), sum(pooled_data_female$BSSQ_4), sum(pooled_data_female$BSSQ_5), sum(pooled_data_female$BSSQ_6), sum(pooled_data_female$BSSQ_7), sum(pooled_data_female$BSSQ_8), sum(pooled_data_female$BSSQ_9), sum(pooled_data_female$BSSQ_10), sum(pooled_data_female$BSSQ_11), sum(pooled_data_female$BSSQ_12), sum(pooled_data_female$BSSQ_13), sum(pooled_data_female$BSSQ_14), sum(pooled_data_female$BSSQ_15), sum(pooled_data_female$BSSQ_16))

ASSQ_female = c(sum(pooled_data_female$ASSQ_1), sum(pooled_data_female$ASSQ_2), sum(pooled_data_female$ASSQ_3), sum(pooled_data_female$ASSQ_4), sum(pooled_data_female$ASSQ_5), sum(pooled_data_female$ASSQ_6), sum(pooled_data_female$ASSQ_7), sum(pooled_data_female$ASSQ_8), sum(pooled_data_female$ASSQ_9), sum(pooled_data_female$ASSQ_10), sum(pooled_data_female$ASSQ_11), sum(pooled_data_female$ASSQ_12), sum(pooled_data_female$ASSQ_13), sum(pooled_data_female$ASSQ_14), sum(pooled_data_female$ASSQ_15), sum(pooled_data_female$ASSQ_16))

diff_female <- data.frame(
  label = c("ASSQ_1 - BSSQ_1","ASSQ_2 - BSSQ_2","ASSQ_3 - BSSQ_3","ASSQ_4 - BSSQ_4","ASSQ_5 - BSSQ_5","ASSQ_6 - BSSQ_6","ASSQ_7 - BSSQ_7","ASSQ_8 - BSSQ_8", "ASSQ_9 - BSSQ_9", "ASSQ_10 - BSSQ_10", "ASSQ_11 - BSSQ_11", "ASSQ_12 - BSSQ_12", "ASSQ_13 - BSSQ_13", "ASSQ_14 - BSSQ_14", "ASSQ_15 - BSSQ_15","ASSQ_16 - BSSQ_16"),
  diff = ASSQ_female - BSSQ_female
)

diff_female$label <- factor(diff_female$label, levels = diff_female$label)

plot_female <- ggplot(diff_female, aes(x = label, y = diff, fill = label,
                         text = paste(label, diff, sep=": "))) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(
    axis.text.x  = element_blank(),
    axis.title.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +
  labs(y = "ASSQ - BSSQ", title = "Difference between ASSQ and BSSQ in Female")

ggplotly(plot_female, tooltip = "text")
Code
pooled_data_vr_exp1 <- filter(pooled_data, cont_1 == "NO_SM") %>%
  group_by(VRexperience) %>%
  reframe(avg_ssq_full = mean(ssq_full))


pooled_data_vr_exp2 <- filter(pooled_data, cont_1 == "SM") %>%
  group_by(VRexperience) %>%
  reframe(avg_ssq_full = mean(ssq_full))

pooled_data$cont_1
  [1] "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
 [10] "SM"    "NO_SM" "SM"    "SM"    "SM"    "SM"    "NO_SM" "SM"    "NO_SM"
 [19] "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "NO_SM" "SM"   
 [28] "NO_SM" "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "SM"   
 [37] "SM"    "NO_SM" "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
 [46] "SM"    "SM"    "SM"    "SM"    "SM"    "NO_SM" "NO_SM" "NO_SM" "SM"   
 [55] "SM"    "SM"    "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "SM"   
 [64] "NO_SM" "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "SM"    "SM"    "SM"   
 [73] "SM"    "NO_SM" "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"    "SM"   
 [82] "SM"    "NO_SM" "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"   
 [91] "SM"    "NO_SM" "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "NO_SM"
[100] "NO_SM" "SM"    "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "NO_SM" "NO_SM"
[109] "SM"    "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"   
[118] "SM"    "SM"    "NO_SM" "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"   
[127] "SM"    "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "NO_SM"
[136] "SM"    "SM"    "NO_SM" "SM"    "SM"    "NO_SM" "SM"    "SM"    "NO_SM"
[145] "SM"    "NO_SM" "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"    "SM"   
[154] "SM"    "NO_SM" "SM"    "NO_SM" "SM"    "NO_SM" "SM"    "NO_SM" "SM"   
[163] "SM"    "SM"    "SM"    "SM"    "NO_SM" "NO_SM" "NO_SM" "SM"    "NO_SM"
[172] "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "SM"    "SM"    "NO_SM"
[181] "SM"    "SM"    "SM"    "SM"    "NO_SM" "NO_SM" "SM"    "NO_SM" "SM"   
[190] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[199] "NO_SM" "SM"    "NO_SM" "NO_SM" "SM"    "SM"    "NO_SM" "SM"    "NO_SM"
[208] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[217] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[226] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[235] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[244] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[253] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[262] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[271] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[280] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[289] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[298] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[307] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[316] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[325] "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"    "SM"   
[334] "SM"    "SM"    "SM"   
Code
ggplot(pooled_data_vr_exp1) + 
  geom_bar(stat = "identity", aes(x = VRexperience, y = avg_ssq_full, fill = VRexperience)) +
  labs(x = "VR Experience", title = "Mean difference in Side Effect Severity", y = "Mean") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5)
  )

Code
pooled_data_vr_exp2
# A tibble: 2 × 2
  VRexperience avg_ssq_full
  <chr>               <dbl>
1 No                   14.7
2 Yes                  14.4
Code
ggplot(pooled_data_vr_exp2) + 
  geom_bar(stat = "identity", aes(x = VRexperience, y = avg_ssq_full, fill = VRexperience)) +
  labs(x = "VR Experience", title = "Mean difference in Side Effect Severity", y = "Mean") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5)
  )

Code
min(pooled_data$age)
[1] 17
Code
max(pooled_data$age)
[1] 58
Code
pooled_data_age <- mutate(filter(pooled_data, cont_1=="SM"), age_interval = case_when(
  23 >= age & age >= 18 ~ "18 - 23",
  29 >= age & age >= 24 ~ "24 - 29",
  34 >= age & age >= 30 ~ "30 - 34",
  age >= 35 ~ "35 - 40"
))

pooled_data_age %>%
  group_by(age_interval) %>%
  reframe(mean_diff = mean(ssq_full))
# A tibble: 5 × 2
  age_interval mean_diff
  <chr>            <dbl>
1 18 - 23           13.5
2 24 - 29           18.0
3 30 - 34           13.0
4 35 - 40           14.5
5 <NA>               6.5
Code
ggplot(filter(pooled_data, cont_1 == "NO_SM"), aes(x = p_stai, y = ssq_full)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula = 'y ~ x'

Code
model = lm(ssq_full ~ p_stai, data = filter(pooled_data, cont_1 == "NO_SM"))

ggplot(model, aes(x = .fitted, y = .resid)) + 
  geom_point() + # Create scatterplot
  geom_hline(yintercept = 0, linetype = "dashed", colour = "red")

Code
summary(model)

Call:
lm(formula = ssq_full ~ p_stai, data = filter(pooled_data, cont_1 == 
    "NO_SM"))

Residuals:
    Min      1Q  Median      3Q     Max 
-14.319  -7.555  -3.108   6.128  43.364 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   0.2383     5.0824   0.047    0.963
p_stai        0.5528     0.4965   1.113    0.269

Residual standard error: 10.77 on 67 degrees of freedom
Multiple R-squared:  0.01817,   Adjusted R-squared:  0.003513 
F-statistic:  1.24 on 1 and 67 DF,  p-value: 0.2695
Code
ggplot(filter(pooled_data, cont_1 == "SM"), aes(x = p_stai, y = ssq_full)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula = 'y ~ x'

Code
ggplot(pooled_data_age, aes(x = p_stai, y = ssq_full)) +
  geom_point(aes(color = age_interval)) +
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula = 'y ~ x'

Code
model = lm(ssq_full ~ p_stai, data = pooled_data)

ggplot(model, aes(x = .fitted, y = .resid)) + # Plotting fitted values and residuals
  geom_point() + # Create scatterplot
  geom_hline(yintercept = 0, linetype = "dashed", colour = "red")

Code
summary(model)

Call:
lm(formula = ssq_full ~ p_stai, data = pooled_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.086 -11.427  -4.232   8.166  93.153 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -3.4661     3.3140  -1.046    0.296    
p_stai        1.3854     0.2729   5.076 6.41e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 16.47 on 334 degrees of freedom
Multiple R-squared:  0.07162,   Adjusted R-squared:  0.06884 
F-statistic: 25.77 on 1 and 334 DF,  p-value: 6.412e-07
Code
ggplot(filter(pooled_data), aes(x = p_vra, y = ssq_full)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula = 'y ~ x'

Code
model1 = lm(ssq_full ~ p_vra, data = filter(pooled_data))

ggplot(model1, aes(x = .fitted, y = .resid)) + # Plotting fitted values and residuals
  geom_point() + # Create scatterplot
  geom_hline(yintercept = 0, linetype = "dashed", colour = "red")

Code
summary(model1)

Call:
lm(formula = ssq_full ~ p_vra, data = filter(pooled_data))

Residuals:
    Min      1Q  Median      3Q     Max 
-32.977 -10.385  -3.590   7.204  86.410 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   3.7997     1.4447   2.630  0.00893 ** 
p_vra         2.5968     0.3378   7.687  1.7e-13 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.75 on 334 degrees of freedom
Multiple R-squared:  0.1503,    Adjusted R-squared:  0.1478 
F-statistic: 59.09 on 1 and 334 DF,  p-value: 1.697e-13
Code
cor(pooled_data$age, pooled_data$ssq_full)
[1] -0.01968983
Code
ggplot(pooled_data, aes(x = age, y = ssq_full)) + 
  geom_point()+
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula = 'y ~ x'

Code
ggplot(pooled_data) +
  geom_boxplot(aes(y = PSTAI_4, x = ssq_full, fill = PSTAI_4))

Code
ggplot(pooled_data) +
  geom_boxplot(aes(y = PSTAI_6, x = ssq_full, fill = PSTAI_6))

Code
ggplot(pooled_data) +
  geom_boxplot(aes(y = PSTAI_2, x = ssq_full, fill = PSTAI_2))

Code
ggplot(pooled_data) +
  geom_boxplot(aes(y = PSTAI_1, x = ssq_full, fill = PSTAI_1))

Code
ggplot(pooled_data) +
  geom_boxplot(aes(y = PSTAI_6, x = p_e, fill = PSTAI_6))

Articles

(APA Style)

Acknowledgments

Group Meetings

Contributions

Resources

AI Usage Statement